library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(magrittr)
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:magrittr':
## 
##     extract
library(ggplot2)
library(skimr)
## Warning: package 'skimr' was built under R version 4.1.2
library(DataExplorer)
## Warning: package 'DataExplorer' was built under R version 4.1.2
library(plotly)
## Warning: package 'plotly' was built under R version 4.1.2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(ggplot2)
library(IRdisplay)
## Warning: package 'IRdisplay' was built under R version 4.1.2
#setwd("C:/Users/18572/Documents/Data Visualization/Assignment/HW6")
#df <- read.csv(file="C:/Users/18572/Documents/Data Visualization/HW6/marketing_campaign.csv")
main_df <- read.csv(file="marketing_campaign.csv", sep = "\t", header = T)
#plot_intro(main_df,title='Dataset')
head(main_df)
skim(main_df)
Data summary
Name main_df
Number of rows 2240
Number of columns 29
_______________________
Column type frequency:
character 3
numeric 26
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
Education 0 1 3 10 0 5 0
Marital_Status 0 1 4 8 0 8 0
Dt_Customer 0 1 10 10 0 663 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
ID 0 1.00 5592.16 3246.66 0 2828.25 5458.5 8427.75 11191 ▇▇▇▇▇
Year_Birth 0 1.00 1968.81 11.98 1893 1959.00 1970.0 1977.00 1996 ▁▁▂▇▅
Income 24 0.99 52247.25 25173.08 1730 35303.00 51381.5 68522.00 666666 ▇▁▁▁▁
Kidhome 0 1.00 0.44 0.54 0 0.00 0.0 1.00 2 ▇▁▆▁▁
Teenhome 0 1.00 0.51 0.54 0 0.00 0.0 1.00 2 ▇▁▇▁▁
Recency 0 1.00 49.11 28.96 0 24.00 49.0 74.00 99 ▇▇▇▇▇
MntWines 0 1.00 303.94 336.60 0 23.75 173.5 504.25 1493 ▇▂▂▁▁
MntFruits 0 1.00 26.30 39.77 0 1.00 8.0 33.00 199 ▇▁▁▁▁
MntMeatProducts 0 1.00 166.95 225.72 0 16.00 67.0 232.00 1725 ▇▁▁▁▁
MntFishProducts 0 1.00 37.53 54.63 0 3.00 12.0 50.00 259 ▇▁▁▁▁
MntSweetProducts 0 1.00 27.06 41.28 0 1.00 8.0 33.00 263 ▇▁▁▁▁
MntGoldProds 0 1.00 44.02 52.17 0 9.00 24.0 56.00 362 ▇▁▁▁▁
NumDealsPurchases 0 1.00 2.33 1.93 0 1.00 2.0 3.00 15 ▇▂▁▁▁
NumWebPurchases 0 1.00 4.08 2.78 0 2.00 4.0 6.00 27 ▇▃▁▁▁
NumCatalogPurchases 0 1.00 2.66 2.92 0 0.00 2.0 4.00 28 ▇▂▁▁▁
NumStorePurchases 0 1.00 5.79 3.25 0 3.00 5.0 8.00 13 ▂▇▂▃▂
NumWebVisitsMonth 0 1.00 5.32 2.43 0 3.00 6.0 7.00 20 ▅▇▁▁▁
AcceptedCmp3 0 1.00 0.07 0.26 0 0.00 0.0 0.00 1 ▇▁▁▁▁
AcceptedCmp4 0 1.00 0.07 0.26 0 0.00 0.0 0.00 1 ▇▁▁▁▁
AcceptedCmp5 0 1.00 0.07 0.26 0 0.00 0.0 0.00 1 ▇▁▁▁▁
AcceptedCmp1 0 1.00 0.06 0.25 0 0.00 0.0 0.00 1 ▇▁▁▁▁
AcceptedCmp2 0 1.00 0.01 0.11 0 0.00 0.0 0.00 1 ▇▁▁▁▁
Complain 0 1.00 0.01 0.10 0 0.00 0.0 0.00 1 ▇▁▁▁▁
Z_CostContact 0 1.00 3.00 0.00 3 3.00 3.0 3.00 3 ▁▁▇▁▁
Z_Revenue 0 1.00 11.00 0.00 11 11.00 11.0 11.00 11 ▁▁▇▁▁
Response 0 1.00 0.15 0.36 0 0.00 0.0 0.00 1 ▇▁▁▁▂
plot_bar(main_df,ncol=2) 
## 1 columns ignored with more than 50 categories.
## Dt_Customer: 663 categories

#Filling the missing value with median value
main_df$Income[is.na(main_df$Income)]=median(main_df$Income,na.rm=T)
#Calculating the age for each customer as a vector
current_date= Sys.Date()
current_year = format(current_date,format="%Y")
current_year = as.integer(current_year)
Age= c(current_year - main_df$Year_Birth)
Age
##    [1]  64  67  56  37  40  54  50  36  47  71  38  45  62  69  34  75  41  75
##   [19]  72  36  39  42  72  67  70  52  45  35  32  56  32  58  51  69  75  70
##   [37]  51  45  48  78  41  36  64  62  46  45  25  53  70  67  64  57  44  44
##   [55]  43  66  55  33  39  53  39  46  69  70  73  50  54  42  63  51  67  48
##   [73]  62  46  69  44  49  45  45  56  36  52  48  58  34  61  51  64  48  54
##   [91]  64  64  48  49  51  38  61  49  70  38  61  62  42  76  62  42  57  52
##  [109]  43  61  42  30  58  30  41  59  51  43  54  52  68  54  36  45  38  56
##  [127]  37  68  60  65  69  63  52  58  57  54  45  62  48  56  29  45  59  47
##  [145]  33  45  46  40  48  55  68  52  49  33  69  72  63  43  44  66  63  76
##  [163]  37  46  48  55  68  62  33  64  56  39  51  62  54  46  73  51  62  70
##  [181]  45  33  75  37  60  38  51  55  45  57  43  32 121  33  48  44  41  67
##  [199]  67  48  69  53  43  44  56  37  33  58  55  67  65  67  53  47  36  44
##  [217]  43  73  67  41  33  52  51  41  59  43  45  69  49  35  55  59  50  70
##  [235]  56  46  39  63  48 128  53  63  50  31  63  57  49  68  33  52  46  36
##  [253]  47  63  54  47  60  57  57  64  42  53  51  43  69  55  58  51  51  51
##  [271]  68  38  46  61  58  58  65  40  35  53  40  45  75  47  46  47  69  65
##  [289]  55  72  37  52  41  50  56  69  33  35  39  56  63  65  43  33  37  60
##  [307]  55  74  57  29  45  54  32  45  55  65  49  36  70  51  50  47  56  60
##  [325]  68  64  45  37  71  35  38  32  65  50  38  61  53  49  56 122  33  35
##  [343]  52  71  48  69  56  51  50  45  64  35  55  47  45  43  66  52  78  73
##  [361]  64  52  43  43  54  52  61  45  59  46  37  46  51  55  49  46  51  32
##  [379]  60  52  52  36  67  48  40  50  53  61  72  53  42  42  28  75  53  30
##  [397]  50  37  34  41  65  56  47  61  66  56  49  50  47  58  39  45  66  34
##  [415]  59  78  46  27  40  50  48  38  39  47  80  68  35  32  32  53  60  32
##  [433]  54  57  51  65  55  72  50  35  32  73  75  32  47  62  49  57  45  68
##  [451]  66  52  50  41  45  49  55  37  69  39  57  72  63  58  46  52  66  51
##  [469]  70  50  46  41  40  43  33  65  49  44  49  44  48  49  47  46  61  57
##  [487]  45  47  52  69  63  45  66  57  77  59  43  50  72  77  71  60  31  36
##  [505]  65  68  45  32  29  62  40  42  57  62  75  46  60  31  62  77  56  34
##  [523]  30  68  34  35  63  44  57  61  46  52  44  59  39  70  54  54  69  40
##  [541]  44  52  37  39  45  31  57  41  53  48  58  32  50  58  72  69  43  39
##  [559]  51  62  43  41  50  51  58  71  59  70  42  51  47  53  43  51  40  44
##  [577]  53  51  55  65  47  70  54  70  38  50  66  60  66  62  37  46  56  44
##  [595]  49  46  52  52  43  40  60  61  66  73  55  35  48  65  46  56  52  77
##  [613]  48  54  52  47  56  45  43  61  60  50  72  43  66  45  46  61  44  44
##  [631]  51  63  51  61  48  59  69  59  71  50  61  73  62  57  74  50  51  51
##  [649]  44  63  61  36  47  71  66  46  68  63  74  55  58  36  46  41  65  62
##  [667]  47  34  53  50  57  59  51  41  46  47  58  42  36  41  58  54  39  66
##  [685]  54  45  49  39  32  52  61  51  63  69  72  60  26  72  60  61  48  43
##  [703]  43  33  53  35  51  70  73  50  35  56  46  68  39  46  61  43  51  63
##  [721]  56  67  37  56  45  55  66  45  56  30  72  68  43  73  48  72  48  69
##  [739]  32  52  68  65  71  43  57  69  46  26  51  67  49  63  46  52  42  43
##  [757]  49  57  42  62  40  54  33  43  69  66  44  34  56  65  42  70  56  49
##  [775]  64  68  56  63  66  50  47  45  51  49  49  35  61  53  43  45  76  64
##  [793]  71  43  60  59  67  56  48  53  51  65  70  77  63  45  50  64  49  49
##  [811]  40  49  46  66  35  51  45  67  49  43  41  49  60  58  62  62  67  59
##  [829]  63  37  45  29  54  41  54  65  49  41  41  44  71  47  33  40  61  51
##  [847]  30  65  58  41  47  67  64  55  47  42  48  36  55  32  52  52  34  32
##  [865]  46  48  55  35  43  42  38  62  45  40  43  68  50  38  65  43  49  46
##  [883]  46  40  49  51  68  62  59  43  48  56  38  30  78  49  65  63  47  35
##  [901]  63  51  54  48  52  44  60  50  47  35  55  29  58  49  27  67  61  39
##  [919]  67  67  66  28  52  73  35  42  43  39  28  45  47  32  70  32  69  50
##  [937]  50  51  50  46  69  59  38  68  35  34  39  42  44  46  42  45  48  50
##  [955]  45  45  56  69  48  64  54  59  52  46  41  62  48  48  47  41  33  53
##  [973]  33  62  50  59  62  56  69  67  61  56  42  43  71  47  43  52  45  65
##  [991]  67  49  40  32  67  26  43  64  48  65  65  34  40  34  67  67  68  69
## [1009]  53  58  42  45  43  53  50  70  56  52  48  45  48  47  68  55  43  54
## [1027]  43  34  63  57  50  50  52  44  48  41  63  49  50  68  62  67  30  59
## [1045]  35  56  55  61  37  55  56  55  72  51  38  36  47  70  69  34  72  63
## [1063]  34  48  57  60  29  56  65  43  50  55  66  68  63  60  40  45  53  43
## [1081]  56  50  65  39  51  44  51  63  38  63  48  47  62  49  56  42  63  35
## [1099]  39  42  46  42  35  51  49  61  69  51  47  56  67  38  48  61  37  50
## [1117]  38  38  55  43  53  42  48  42  67  59  55  44  36  69  70  54  43  45
## [1135]  36  47  53  59  32  60  68  32  43  59  76  47  40  65  67  35  78  57
## [1153]  42  47  61  58  45  47  60  70  56  65  56  27  47  49  53  53  73  52
## [1171]  25  47  31  64  42  61  69  40  40  30  71  64  60  63  50  36  56  69
## [1189]  59  47  51  52  61  69  52  53  56  52  49  60  44  62  63  42  64  65
## [1207]  56  31  64  56  64  66  49  52  49  46  56  58  30  58  35  48  64  40
## [1225]  63  62  66  35  46  66  50  35  43  38  45  50  65  49  32  69  37  54
## [1243]  59  74  43  50  29  53  48  46  56  69  54  49  58  51  66  46  45  52
## [1261]  48  38  39  61  69  46  58  51  73  63  77  58  49  55  53  50  46  57
## [1279]  43  46  38  50  64  70  70  51  49  52  49  50  65  50  48  44  58  67
## [1297]  43  55  75  43  50  68  36  51  42  55  45  56  39  49  61  65  56  66
## [1315]  32  59  56  48  49  37  61  63  53  44  47  59  39  46  39  76  42  52
## [1333]  59  73  43  46  39  38  69  39  59  44  72  31  39  46  62  38  45  59
## [1351]  65  28  34  56  39  59  43  75  58  50  53  29  47  71  44  63  45  48
## [1369]  53  63  46  61  56  61  52  61  46  36  50  51  66  65  63  57  48  73
## [1387]  49  45  41  42  49  48  49  46  49  65  47  49  53  52  60  49  68  64
## [1405]  51  54  70  68  71  68  44  49  52  77  31  42  55  73  49  48  49  49
## [1423]  55  52  62  51  47  49  63  49  38  48  36  48  54  42  60  35  57  38
## [1441]  69  72  42  73  75  63  50  64  39  48  49  56  68  54  56  52  50  49
## [1459]  51  47  71  62  66  43  35  58  68  69  71  51  51  44  55  52  63  67
## [1477]  65  51  34  60  42  61  40  41  45  45  51  75  66  49  50  71  33  43
## [1495]  31  62  44  52  62  33  46  48  44  48  66  47  49  46  50  63  33  46
## [1513]  56  35  44  41  59  43  49  51  38  50  45  48  48  45  73  63  34  61
## [1531]  46  63  46  46  33  33  32  62  50  54  32  60  43  68  57  35  34  58
## [1549]  64  46  70  56  65  59  47  54  59  45  60  45  38  45  33  67  36  69
## [1567]  57  67  71  64  75  67  30  35  44  61  39  74  29  45  65  74  71  55
## [1585]  34  48  69  56  41  34  59  66  33  56  54  48  61  37  50  46  56  65
## [1603]  66  61  60  58  44  31  48  57  33  43  31  46  44  37  38  34  56  65
## [1621]  56  50  39  62  58  69  50  58  39  45  74  52  62  43  73  66  67  76
## [1639]  52  45  52  57  40  42  40  61  47  40  54  71  39  60  58  44  74  43
## [1657]  67  67  49  40  63  53  45  55  32  36  54  62  51  54  36  70  35  66
## [1675]  51  30  46  42  45  41  43  50  66  38  69  52  41  53  50  32  70  34
## [1693]  36  40  31  69  50  38  64  70  45  66  32  48  48  37  65  56  43  48
## [1711]  35  62  43  55  29  37  65  44  47  45  42  38  30  59  39  42  66  56
## [1729]  60  70  57  62  66  55  63  67  54  58  62  50  57  38  76  74  52  69
## [1747]  74  75  52  72  60  47  41  72  50  42  53  50  66  49  57  42  46  44
## [1765]  35  65  41  39  60  46  47  75  36  69  54  66  36  44  48  37  42  45
## [1783]  53  62  50  54  48  38  42  42  71  58  59  45  69  61  66  44  57  40
## [1801]  45  47  70  55  70  63  55  57  65  47  55  58  52  29  67  38  39  70
## [1819]  65  60  59  50  50  53  43  52  56  35  63  46  34  62  59  47  44  43
## [1837]  51  64  66  67  39  68  53  64  56  46  58  31  55  33  26  46  43  60
## [1855]  37  50  70  45  49  36  55  46  42  50  39  48  53  37  50  44  54  57
## [1873]  49  73  69  34  62  63  70  46  37  52  53  52  36  37  64  50  46  39
## [1891]  33  32  36  53  47  72  43  67  76  71  57  38  34  50  53  67  50  35
## [1909]  62  41  61  51  50  54  58  53  31  40  45  56  44  49  65  78  62  44
## [1927]  52  51  64  56  47  74  39  53  70  46  65  72  62  48  37  71  44  37
## [1945]  45  70  39  47  53  67  81  45  70  45  57  71  45  65  31  71  56  59
## [1963]  43  69  55  74  49  58  72  43  55  38  50  46  50  52  58  53  42  37
## [1981]  37  47  70  59  46  64  43  30  35  65  58  50  74  63  61  41  46  46
## [1999]  48  41  51  54  51  55  49  44  51  57  62  55  63  35  44  40  63  46
## [2017]  38  54  70  49  43  39  65  69  56  42  56  68  72  36  74  66  45  46
## [2035]  37  71  44  70  37  39  30  51  40  42  50  51  62  48  51  74  70  46
## [2053]  62  66  68  56  48  45  71  52  52  40  39  43  47  70  73  69  37  42
## [2071]  72  65  52  34  38  51  44  61  50  67  48  66  61  33  78  52  29  62
## [2089]  41  50  47  52  45  28  72  71  46  65  45  46  45  72  37  47  49  35
## [2107]  59  51  72  60  66  40  68  47  67  52  40  69  59  50  45  45  41  65
## [2125]  64  49  55  35  56  47  45  45  72  41  64  56  52  51  67  55  47  50
## [2143]  53  64  48  64  50  61  48  59  63  49  45  52  45  38  57  46  66  56
## [2161]  57  41  64  72  29  47  67  31  37  40  56  51  46  70  66  46  54  48
## [2179]  50  66  47  49  57  43  64  61  49  65  64  49  38  50  74  64  64  69
## [2197]  47  48  67  46  31  52  48  43  31  53  41  67  58  58  66  40  71  26
## [2215]  45  67  38  73  43  36  42  39  42  58  51  53  42  59  43  49  37  51
## [2233]  35  44  47  54  75  40  65  67
#Adding Age column and then calculating maximum age 
main_df['Age']=Age
max(main_df$Age)
## [1] 128
#Calculating median age 
median(main_df$Age)
## [1] 51
#Calculating average age
mean(main_df$Age)
## [1] 52.1942
#Understanding the income based on Marital_Status
#ggplot(df) +  
#  geom_point(aes(x = Income, y = Marital_Status), color = '#FF0000') + labs(x="Income")
children = main_df %>% filter(Age < 15) %>% summarize(n())
young = main_df %>% filter(15 <= Age & Age <= 25) %>% summarize(n())
middle_aged = main_df %>% filter(25 < Age & Age <= 35) %>% summarize(n())
above_35 = main_df %>% filter( Age > 36) %>% summarize(n())
children = as.integer(children)
young = as.integer(young)
middle_aged = as.integer(middle_aged)
above_35 = as.integer(above_35)

count = c(children,young,middle_aged,above_35)
labels_age = c('Children','Young','Middle Aged','Above 35')
fig1 = plot_ly(labels = ~labels_age, values = ~count, type = 'pie',textposition='inside',
textinfo = 'label+percent',insidetextfont = list(color = '#FFFFFF'))
fig1 = fig1 %>% layout(title = 'Age Group Distribution',
        xaxis = list(showgrid = FALSE), yaxis = list(showgrid = FALSE))
fig1
#Unique Education values
unique(main_df$Education)
## [1] "Graduation" "PhD"        "Master"     "Basic"      "2n Cycle"
#Creating a table for Education 
table_ed = sort(table(main_df$Education),decreasing=T)
per_ed = as.vector(prop.table(table_ed)*100)
labels_ed = c('Graduation','PhD','Master','2n Cycle','Basic')
df_ed = data.frame(labels_ed,per_ed)
df_ed 
fig2 = plot_ly(df_ed, y=~labels_ed,x=~per_ed,type='bar',marker= list(color=c('#001889','#74008D','#E06A5A','#EACA00',"#FF0000")))
fig2 = fig2 %>% layout(title="Percentage of Edcational Level",xaxis = list(title='Percentage'),
yaxis = list(title='Education',categoryorder = "array",
categoryarray = rev(labels_ed)))
fig2
#How many children's customers have?
#Unique values in Kidhome
unique(main_df$Kidhome)
## [1] 0 1 2
#Unique values in Teenhome
unique(main_df$Teenhome)
## [1] 0 1 2
#Merge
main_df['Children']=main_df['Kidhome']+main_df['Teenhome']
#Creating table function for children column
table_cd=sort(table(main_df$Children),decreasing=T)
per_cd = as.vector(prop.table(table_cd)*100)
labels_cd = c('1','0','2','3')
df_cd = data.frame(labels_cd,per_cd)
df_cd
fig3 = plot_ly(df_cd, y=~labels_cd,x=~per_cd,type='bar',orientation='h',
marker= list(color=c('#001889','#74008D','#E06A5A','#EACA00')))
fig3 = fig3 %>% layout(title="Percentage of No of Children",xaxis = list(title='Percentage'),
yaxis = list(title='No of Children',categoryorder = "array",
categoryarray = rev(labels_cd)))
fig3
main_df['Expenses'] = main_df['MntWines'] + main_df['MntFruits'] + main_df['MntMeatProducts'] + 
main_df['MntFishProducts'] + main_df['MntSweetProducts'] + main_df['MntGoldProds']
fig4 = plot_ly(main_df,x=~Expenses,type='histogram',marker = list(color = '#74008D',
line = list(color = '#FFFFFF',width = 1)))
fig4 = fig4 %>% layout(title="Frequency of Customer's Expenses",xaxis=list(title='Expense ($)'),yaxis=list(title='Frequency'),xaxis = list(gridcolor = 'ffff'))
fig4
#Maximum Income
max(main_df$Income)
## [1] 666666
#Average Income
mean(main_df$Income)
## [1] 52237.98
fig5= plot_ly(main_df,x=~Income,type='histogram',marker = list(color = '#EACA00',
line = list(color = '#FFFFFF',width = 1)))
fig5 = fig5 %>% layout(title="Frequency of Income",xaxis=list(title='Income'),yaxis=list(title='Frequency'))
fig5